# citizen forecasting 2020: a state-by-state experiment
# andreas murr & mike lewis-beck
# prepare AnesNation.RData

# clear working memory

rm(list=ls())

# load packages

library(foreign)
library(arm)

# ======
# = ts =
# ======

# load data

Anes = read.dta("anes_cdfdta/anes_cdf.dta", convert.factors=FALSE)

# year

Anes$year = Anes$VCF0004

# forecast
# 1 = dem, 2 = rep, 3 = oth, 8 = dk, 9 = ref

Anes$forecast = NA
Anes$forecast[Anes$VCF0700==1] = 1
Anes$forecast[Anes$VCF0700==2] = 2
Anes$forecast[Anes$VCF0700==7] = 3
Anes$forecast[Anes$VCF0700==8] = 8

# save data

Anes2008 = Anes[Anes$year %in% seq(1952, 2008, 4), c("year", "forecast")]

# ========
# = 2012 =
# ========

# load data

Anes = read.dta("/anes2012TS_dta/anes2012TS_dataset.dta", convert.factors=FALSE)

# year

Anes$year = 2012


# forecast
# 1 = dem, 2 = rep, 3 = oth, 8 = dk, 9 = ref

table(Anes$preswin_win)

Anes$forecast = NA
Anes$forecast[Anes$preswin_win==1] = 1
Anes$forecast[Anes$preswin_win==2] = 2
Anes$forecast[Anes$preswin_win==5] = 3
Anes$forecast[Anes$preswin_win==-8] = 8
Anes$forecast[Anes$preswin_win==-9] = 9

# save data

Anes2012 = Anes[c("year", "forecast")]

# ========
# = 2016 =
# ========

# load data

Anes = read.dta("anes_timeseries_2016_dta/anes_timeseries_2016_Stata12.dta", convert.factors=FALSE)

# year

Anes$year = 2016

# forecast
# 1 = dem, 2 = rep, 3 = oth, 8 = dk, 9 = ref

Anes$forecast = NA
Anes$forecast[Anes$V161146==1] = 1
Anes$forecast[Anes$V161146==2] = 2
Anes$forecast[Anes$V161146==3] = 1
Anes$forecast[Anes$V161146==4] = 2
Anes$forecast[Anes$V161146==5] = 3
Anes$forecast[Anes$V161146==6] = 3
Anes$forecast[Anes$V161146==7] = 8
Anes$forecast[Anes$V161146==8] = 8
Anes$forecast[Anes$V161146==9] = 9
Anes$forecast[Anes$V161146==-8] = 8
Anes$forecast[Anes$V161146==-9] = 9

# save data

Anes2016 = Anes[c("year", "forecast")]

# =====================
# = combine anes data =
# =====================

Anes = rbind(Anes2008, Anes2012, Anes2016)
Anes$forecast = factor(Anes$forecast, levels = c(1:3, 8:9), labels = c("d", "r", "o", "dk", "NA"))

# ==========
# = nation =
# ==========

# load data

Nation = read.csv("PresidentialElectionResultsNation1948to2016.csv", header=TRUE)

# incumbent two-party popular and electoral vote shares

Nation$pop.inc = with(Nation, ifelse(incumbent=="r", pvote.r / (pvote.d + pvote.r), pvote.d / (pvote.d + pvote.r)))
Nation$ele.inc = with(Nation, ifelse(incumbent=="r", evote.r / (evote.d + evote.r), evote.d / (evote.d + evote.r)))

# democratic two-party popular and electoral vote shares

Nation$pop.dem = with(Nation, pvote.d / (pvote.d + pvote.r))
Nation$ele.dem = with(Nation, evote.d / (evote.d + evote.r))

# select variables

Nation = Nation[c("year", "elected", "pop.dem", "ele.dem", "pop.inc", "ele.inc")]

# ===================================================
# = aggregate anes data and combine with state data =
# ===================================================

Nation$p.dem = NA
Nation$n = NA
Nation$p.inc = NA
Nation$forecast = NA

for (i in 1:nrow(Nation)){
	data = Anes[Anes$year==Nation$year[i],]
	n.dem = sum(data$forecast=="d", na.rm=TRUE)
	n.rep = sum(data$forecast=="r", na.rm=TRUE)
	Nation$n[i] = n.dem + n.rep
	Nation$p.dem[i] = n.dem / Nation$n[i]
	Nation$p.inc[i] = with(Nation[i,], ifelse(pop.dem==pop.inc, p.dem, 1-p.dem))
	Nation$forecast[i] = ifelse(Nation$p.dem[i]>.5, "d", ifelse(Nation$p.dem[i]==.5, "t", "r"))		
}

# =============
# = save data =
# =============

AnesNation = Nation
save(AnesNation, file="AnesNation.RData")

# ===================
# = end source code =
# ===================